#!/bin/sh

CPUS=1
DIR=`pwd`

echo "Extracting training attributes file..."
python ../scripts/extract.py -w english.train.words -l english.train.lexemes -p -e -a -o english.train.extract.gz

echo "Detecting features for CRF training..."
# Please excuse this dodgy hack
NUM_LINES=`zcat english.train.extract.gz | wc -l`
NUM_CLIQUES=`echo ${NUM_LINES} / 3 - 1 | bc`
../cc/bin/train_detect -i english.train.extract.gz -o english.train.cliques.gz -n ${NUM_CLIQUES} --feature_map_out english.train.features.gz

echo "Extracting training lattices using detected features..."
../cc/bin/train_detect -i english.train.extract.gz -o english.train.cliques.gz -n ${NUM_CLIQUES} --feature_map_in english.train.features.gz

echo "Training..."
mpirun ../cc/bin/st_crf -c ${DIR}/config --vector_events ${DIR}/english.train.cliques.gz --model_out ${DIR}/english.model

echo "Extracting testing attributes file..."
python ../scripts/extract.py -f english.train.extract.attribute_map.gz -l english.test.lex.gz -w english.test.words.gz -e -a -o english.test.extract.gz

echo "Testing..."
mpirun ../cc/bin/st_decode --st_conf ${DIR}/test_config -m ${DIR}/english.model -o english.labels.gz

echo "Mapping best label to actual tags..."
python ../scripts/labels2tags.py english.train.extract.attribute_map.gz english.labels.gz > english.test.tags
